2  scTCRseq: Compile and format data

Some scRNA and scTCR tumor timepoints were relabelled after this analysis was conducted. These are the corrected timepoints: 101: W20 -> W24 103: W20 -> W25

2.1 Set up workspace

# Load libraries
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(Seurat)
Loading required package: SeuratObject
Loading required package: sp

Attaching package: 'SeuratObject'

The following objects are masked from 'package:base':

    intersect, t
library(dplyr)
library(scRepertoire)

Functions to create the custom definition of clones

# Turn all entries of c_gene into ""
to_empty <- function(df){
  df$c_gene <- ""
  return(df)
}

# Count number of TRA and TRB per barcode
count_TRA_TRB <- function(df){
  df <- df %>%
    mutate(TCR1_count = str_count(TCR1, fixed(";")) + 1,
           TCR2_count = str_count(TCR2, fixed(";")) + 1,
           TCR1_count = replace_na(TCR1_count, 0),
           TCR2_count = replace_na(TCR2_count, 0))
}

# Remove the barcode if it has 2A+2B or 3A/XB or XA/3B
remove_multi_chains <- function(df){
  df <- df %>%
    # 2A+2B
    filter(!(TCR1_count == 2 & TCR2_count == 2),
           !(TCR1_count >= 3),
           !(TCR2_count >= 3))
}

# Remove "NA" if it was supposed to be the "C" gene in TRA
rm_na_TCR1 <- function(df){
  df <- df %>%
    mutate(TCR1 = str_replace_all(TCR1, "(\\.NA(?=;))|(\\.NA$)", ""))
  return(df)
}

# Remove "NA" if it was supposed to be the "C" or "D" gene in TRB (D gene is not seen in bulkTCR data anyway)
rm_na_TCR2 <- function(df){
  df <- df %>%
    mutate(TCR2 = str_replace_all(TCR2, "(\\.NA(?=;))|(\\.NA(?=.))|(\\.NA$)", ""))
  return(df)
}


# Create new columns: CTVJaa, CTbeta, CTalpha
create_columns <- function(df){
  df <- df %>%
    mutate(vjaa = paste0(TCR1, ";", cdr3_aa1, "_", TCR2, ";", cdr3_aa2),
           alpha = paste0(TCR1, ";", cdr3_aa1),
           beta = paste0(TCR2, ";", cdr3_aa2))
}

2.2 Load scTCR clones

P101_Skin_Post3rd <- read.csv("/jsimonlab/projects/Wu/Melanoma_scTCR_Eryn/melanoma_sctcr_reprocessed_040224/P101_Skin_Post3rd_2-5mgIpi_TCR_cellranger/outs/filtered_contig_annotations.csv")
P101_Skin_Pre3rd <- read.csv("/jsimonlab/projects/Wu/Melanoma_scTCR_Eryn/melanoma_sctcr_reprocessed_040224/P101_Skin_Pre3rd_2-5mgIpi_TCR_cellranger/outs/filtered_contig_annotations.csv")
P101_Tumor_W00 <- read.csv("/jsimonlab/projects/Wu/Melanoma_scTCR_Eryn/melanoma_sctcr_reprocessed_040224/P101_Tumor_W00_2-5mgIpi_TCR_cellranger/outs/filtered_contig_annotations.csv")
P101_Tumor_W12 <- read.csv("/jsimonlab/projects/Wu/Melanoma_scTCR_Eryn/melanoma_sctcr_reprocessed_040224/P101_Tumor_W12_2-5mgIpi_TCR_cellranger/outs/filtered_contig_annotations.csv")
P101_Tumor_W20 <- read.csv("/jsimonlab/projects/Wu/Melanoma_scTCR_Eryn/melanoma_sctcr_reprocessed_040224/P101_Tumor_W20_2-5mgIpi_TCR_cellranger/outs/filtered_contig_annotations.csv")
P103_Skin_Post3rd <- read.csv("/jsimonlab/projects/Wu/Melanoma_scTCR_Eryn/melanoma_sctcr_reprocessed_040224/P103_Skin_Post3rd_2-5mgIpi_TCR_cellranger/outs/filtered_contig_annotations.csv")
P103_Skin_Pre3rd <- read.csv("/jsimonlab/projects/Wu/Melanoma_scTCR_Eryn/melanoma_sctcr_reprocessed_040224/P103_Skin_Pre3rd_2-5mgIpi_TCR_cellranger/outs/filtered_contig_annotations.csv")
P103_Tumor_W00 <- read.csv("/jsimonlab/projects/Wu/Melanoma_scTCR_Eryn/melanoma_sctcr_reprocessed_040224/P103_Tumor_W00_2-5mgIpi_TCR_cellranger/outs/filtered_contig_annotations.csv")
P103_Tumor_W12 <- read.csv("/jsimonlab/projects/Wu/Melanoma_scTCR_Eryn/melanoma_sctcr_reprocessed_040224/P103_Tumor_W12_2-5mgIpi_TCR_cellranger/outs/filtered_contig_annotations.csv")
P103_Tumor_W20 <- read.csv("/jsimonlab/projects/Wu/Melanoma_scTCR_Eryn/melanoma_sctcr_reprocessed_040224/P103_Tumor_W20_2-5mgIpi_TCR_cellranger/outs/filtered_contig_annotations.csv")
P104_Skin_Post3rd <- read.csv("/jsimonlab/projects/Wu/Melanoma_scTCR_Eryn/melanoma_sctcr_reprocessed_040224/P104_Skin_Post3rd_2-5mgIpi_TCR_cellranger/outs/filtered_contig_annotations.csv")
P104_Skin_Pre3rd <- read.csv("/jsimonlab/projects/Wu/Melanoma_scTCR_Eryn/melanoma_sctcr_reprocessed_040224/P104_Skin_Pre3rd_2-5mgIpi_TCR_cellranger/outs/filtered_contig_annotations.csv")
P104_Tumor_PD <- read.csv("/jsimonlab/projects/Wu/Melanoma_scTCR_Eryn/melanoma_sctcr_reprocessed_040224/P104_Tumor_PD_2-5mgIpi_TCR_cellranger/outs/filtered_contig_annotations.csv")
P105_Skin_Post3rd <- read.csv("/jsimonlab/projects/Wu/Melanoma_scTCR_Eryn/melanoma_sctcr_reprocessed_040224/P105_Skin_Post3rd_2-5mgIpi_TCR_cellranger/outs/filtered_contig_annotations.csv")
P105_Skin_Pre3rd <- read.csv("/jsimonlab/projects/Wu/Melanoma_scTCR_Eryn/melanoma_sctcr_reprocessed_040224/P105_Skin_Pre3rd_2-5mgIpi_TCR_cellranger/outs/filtered_contig_annotations.csv")
P106_Skin_Post3rd <- read.csv("/jsimonlab/projects/Wu/Melanoma_scTCR_Eryn/melanoma_sctcr_reprocessed_040224/P106_Skin_Post3rd_2-5mgIpi_TCR_cellranger/outs/filtered_contig_annotations.csv")
P106_Skin_Pre3rd <- read.csv("/jsimonlab/projects/Wu/Melanoma_scTCR_Eryn/melanoma_sctcr_reprocessed_040224/P106_Skin_Pre3rd_2-5mgIpi_TCR_fixed021324_cellranger/outs/filtered_contig_annotations.csv")
P108_Skin_Post3rd <- read.csv("/jsimonlab/projects/Wu/Melanoma_scTCR_Eryn/melanoma_sctcr_reprocessed_040224/P108_Skin_Post3rd_5mgIpi_TCR_cellranger/outs/filtered_contig_annotations.csv")
P108_Skin_Pre3rd <- read.csv("/jsimonlab/projects/Wu/Melanoma_scTCR_Eryn/melanoma_sctcr_reprocessed_040224/P108_Skin_Pre3rd_5mgIpi_TCR_cellranger/outs/filtered_contig_annotations.csv")
P108_Tumor_PD <- read.csv("/jsimonlab/projects/Wu/Melanoma_scTCR_Eryn/melanoma_sctcr_reprocessed_040224/P108_Tumor_PD_5mgIpi_TCR_cellranger/outs/filtered_contig_annotations.csv")
P109_Skin_Pre3rd <- read.csv("/jsimonlab/projects/Wu/Melanoma_scTCR_Eryn/melanoma_sctcr_reprocessed_040224/P109_Skin_Pre3rd_5mgIpi_TCR_cellranger/outs/filtered_contig_annotations.csv")
P110_Skin_Post3rd <- read.csv("/jsimonlab/projects/Wu/Melanoma_scTCR_Eryn/melanoma_sctcr_reprocessed_040224/P110_Skin_Post3rd_5mgIpi_TCR_cellranger/outs/filtered_contig_annotations.csv")
P110_Skin_Pre3rd <- read.csv("/jsimonlab/projects/Wu/Melanoma_scTCR_Eryn/melanoma_sctcr_reprocessed_040224/P110_Skin_Pre3rd_5mgIpi_TCR_cellranger/outs/filtered_contig_annotations.csv")
P111_Skin_Post3rd <- read.csv("/jsimonlab/projects/Wu/Melanoma_scTCR_Eryn/melanoma_sctcr_reprocessed_040224/P111_Skin_Post3rd_5mgIpi_TCR_cellranger/outs/filtered_contig_annotations.csv")
P111_Skin_Pre3rd <- read.csv("/jsimonlab/projects/Wu/Melanoma_scTCR_Eryn/melanoma_sctcr_reprocessed_040224/P111_Skin_Pre3rd_5mgIpi_TCR_cellranger/outs/filtered_contig_annotations.csv")

2.3 Combine TCRs and define clones using the V + J gene + cdr3AA

contig_list <- list(P101_Skin_Post3rd, P101_Skin_Pre3rd, P101_Tumor_W00, P101_Tumor_W12, P101_Tumor_W20, P103_Skin_Post3rd, P103_Skin_Pre3rd, P103_Tumor_W00, P103_Tumor_W12, P103_Tumor_W20, P104_Skin_Post3rd, P104_Skin_Pre3rd, P104_Tumor_PD, P105_Skin_Post3rd, P105_Skin_Pre3rd, P106_Skin_Post3rd, P106_Skin_Pre3rd, P108_Skin_Post3rd, P108_Skin_Pre3rd, P108_Tumor_PD, P109_Skin_Pre3rd, P110_Skin_Post3rd, P110_Skin_Pre3rd, P111_Skin_Post3rd, P111_Skin_Pre3rd)
sample_list <- c("P101_Skin_Post3rd", "P101_Skin_Pre3rd", "P101_Tumor_W00", "P101_Tumor_W12", "P101_Tumor_W20", "P103_Skin_Post3rd", "P103_Skin_Pre3rd", "P103_Tumor_W00", "P103_Tumor_W12", "P103_Tumor_W20", "P104_Skin_Post3rd", "P104_Skin_Pre3rd", "P104_Tumor_PD", "P105_Skin_Post3rd", "P105_Skin_Pre3rd", "P106_Skin_Post3rd", "P106_Skin_Pre3rd", "P108_Skin_Post3rd", "P108_Skin_Pre3rd", "P108_Tumor_PD", "P109_Skin_Pre3rd", "P110_Skin_Post3rd", "P110_Skin_Pre3rd", "P111_Skin_Post3rd", "P111_Skin_Pre3rd")

# Create a new column with the combination of the amino acid sequence and gene sequence, without the C gene. ie: VJcdr3AA

contig_list <- lapply(contig_list, to_empty)

combined_TCR <- combineTCR(contig_list, 
                           samples = sample_list,
                           removeNA = FALSE, 
                           removeMulti = FALSE, 
                           filterMulti = FALSE)

# Apply counts and filter
combined_TCR <- lapply(combined_TCR, count_TRA_TRB)

# Supplementary table creation
# Count the number of clones that have less than 2A/2B or 3A/B
suppTable <- do.call(rbind, combined_TCR)

suppTable %>%
  group_by(sample) %>%
    mutate(to_remove = case_when((TCR1_count == 2 & TCR2_count == 2) ~ "MultiChain",
                                 (TCR1_count >= 3) ~ "MultiChain",
                                 (TCR2_count >= 3) ~ "MultiChain",
                                 T ~ "SingleChain")) %>%
  dplyr::count(to_remove) %>%
  pivot_wider(names_from = "to_remove", values_from = "n")
# A tibble: 25 × 3
# Groups:   sample [25]
   sample            MultiChain SingleChain
   <chr>                  <int>       <int>
 1 P101_Skin_Post3rd         36         640
 2 P101_Skin_Pre3rd          15         294
 3 P101_Tumor_W00             1         393
 4 P101_Tumor_W12             9        1289
 5 P101_Tumor_W20             1         271
 6 P103_Skin_Post3rd         33         838
 7 P103_Skin_Pre3rd          22         953
 8 P103_Tumor_W00             2         813
 9 P103_Tumor_W12            14        2733
10 P103_Tumor_W20            13        3016
# ℹ 15 more rows
combined_TCR <- lapply(combined_TCR, remove_multi_chains)

# Apply removals
combined_TCR <- lapply(combined_TCR, rm_na_TCR1)
combined_TCR <- lapply(combined_TCR, rm_na_TCR2)

# Apply new columns
combined_TCR <- lapply(combined_TCR, create_columns)

2.4 Add metadata

combined_TCR <- addVariable(combined_TCR, 
                            variable.name = "Patient", 
                            variables = str_replace_all(sample_list,"_.+",""))
combined_TCR <- addVariable(combined_TCR, 
                            variable.name = "Site", 
                            variables = str_replace_all(sample_list,".+_(.+)_.+","\\1"))
combined_TCR <- addVariable(combined_TCR, 
                            variable.name = "Timepoint", 
                            variables = str_replace_all(sample_list,".+_",""))

2.5 Save data with my custom definition of clones in the “vjaa” column

saveRDS(combined_TCR, "sctcr_scRep_combined_TCR_skin_tumor_Part1.Rds")

2.6 Get session info

sessionInfo()
R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Rocky Linux 8.10 (Green Obsidian)

Matrix products: default
BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so;  LAPACK version 3.9.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: America/New_York
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] scRepertoire_2.0.0 Seurat_5.1.0       SeuratObject_5.0.2 sp_2.2-0          
 [5] lubridate_1.9.4    forcats_1.0.0      stringr_1.5.1      dplyr_1.1.4       
 [9] purrr_1.0.4        readr_2.1.5        tidyr_1.3.1        tibble_3.2.1      
[13] ggplot2_3.5.1      tidyverse_2.0.0   

loaded via a namespace (and not attached):
  [1] cubature_2.1.1              RcppAnnoy_0.0.22           
  [3] splines_4.3.2               later_1.4.1                
  [5] bitops_1.0-9                polyclip_1.10-7            
  [7] fastDummies_1.7.5           lifecycle_1.0.4            
  [9] globals_0.16.3              lattice_0.22-7             
 [11] MASS_7.3-60.0.1             magrittr_2.0.3             
 [13] plotly_4.10.4               rmarkdown_2.29             
 [15] httpuv_1.6.15               sctransform_0.4.1          
 [17] spam_2.11-1                 spatstat.sparse_3.1-0      
 [19] reticulate_1.42.0           cowplot_1.1.3              
 [21] pbapply_1.7-2               RColorBrewer_1.1-3         
 [23] abind_1.4-8                 zlibbioc_1.48.2            
 [25] Rtsne_0.17                  GenomicRanges_1.54.1       
 [27] ggraph_2.2.1                BiocGenerics_0.48.1        
 [29] RCurl_1.98-1.17             tweenr_2.0.3               
 [31] evmix_2.12                  GenomeInfoDbData_1.2.11    
 [33] IRanges_2.36.0              S4Vectors_0.40.2           
 [35] ggrepel_0.9.5               irlba_2.3.5.1              
 [37] listenv_0.9.1               spatstat.utils_3.1-0       
 [39] iNEXT_3.0.1                 MatrixModels_0.5-3         
 [41] goftest_1.2-3               RSpectra_0.16-2            
 [43] spatstat.random_3.3-1       fitdistrplus_1.2-2         
 [45] parallelly_1.41.0           leiden_0.4.3.1             
 [47] codetools_0.2-20            DelayedArray_0.28.0        
 [49] ggforce_0.4.2               tidyselect_1.2.1           
 [51] farver_2.1.2                viridis_0.6.5              
 [53] matrixStats_1.5.0           stats4_4.3.2               
 [55] spatstat.explore_3.3-2      jsonlite_1.8.9             
 [57] tidygraph_1.3.1             progressr_0.15.1           
 [59] ggridges_0.5.6              ggalluvial_0.12.5          
 [61] survival_3.8-3              tools_4.3.2                
 [63] stringdist_0.9.12           ica_1.0-3                  
 [65] Rcpp_1.0.14                 glue_1.8.0                 
 [67] gridExtra_2.3               SparseArray_1.2.4          
 [69] xfun_0.50                   MatrixGenerics_1.14.0      
 [71] GenomeInfoDb_1.38.8         withr_3.0.2                
 [73] fastmap_1.2.0               SparseM_1.84-2             
 [75] digest_0.6.37               timechange_0.3.0           
 [77] R6_2.6.1                    mime_0.13                  
 [79] colorspace_2.1-1            scattermore_1.2            
 [81] tensor_1.5                  spatstat.data_3.1-2        
 [83] utf8_1.2.4                  generics_0.1.3             
 [85] data.table_1.15.4           graphlayouts_1.1.1         
 [87] httr_1.4.7                  htmlwidgets_1.6.4          
 [89] S4Arrays_1.2.1              uwot_0.2.3                 
 [91] pkgconfig_2.0.3             gtable_0.3.6               
 [93] lmtest_0.9-40               SingleCellExperiment_1.24.0
 [95] XVector_0.42.0              htmltools_0.5.8.1          
 [97] dotCall64_1.2               scales_1.3.0               
 [99] Biobase_2.62.0              png_0.1-8                  
[101] spatstat.univar_3.0-0       ggdendro_0.2.0             
[103] knitr_1.49                  rstudioapi_0.17.1          
[105] rjson_0.2.23                tzdb_0.5.0                 
[107] reshape2_1.4.4              nlme_3.1-168               
[109] zoo_1.8-13                  cachem_1.1.0               
[111] KernSmooth_2.23-26          parallel_4.3.2             
[113] miniUI_0.1.1.1              pillar_1.10.1              
[115] grid_4.3.2                  vctrs_0.6.5                
[117] RANN_2.6.2                  VGAM_1.1-13                
[119] promises_1.3.2              xtable_1.8-4               
[121] cluster_2.1.8.1             evaluate_1.0.1             
[123] truncdist_1.0-2             cli_3.6.3                  
[125] compiler_4.3.2              rlang_1.1.5                
[127] crayon_1.5.3                future.apply_1.11.3        
[129] plyr_1.8.9                  stringi_1.8.4              
[131] viridisLite_0.4.2           deldir_2.0-4               
[133] munsell_0.5.1               gsl_2.1-8                  
[135] lazyeval_0.2.2              spatstat.geom_3.3-2        
[137] quantreg_6.1                Matrix_1.6-5               
[139] RcppHNSW_0.6.0              hms_1.1.3                  
[141] patchwork_1.3.0             future_1.34.0              
[143] shiny_1.9.1                 SummarizedExperiment_1.32.0
[145] evd_2.3-7.1                 ROCR_1.0-11                
[147] igraph_2.0.3                memoise_2.0.1